In [1]:
from IPython import parallel
clients = parallel.Client(profile='parallel')
In [2]:
print clients.ids
print "Total %i cores"%(len(clients.ids))
In [3]:
%%px --local
import sys
sys.path.append("\\\\DAP-NAS\\work\\CNOP")
import cPickle as pickle
import numpy as np
import pandas as pd
from statsmodels.tsa.arima_process import arma_generate_sample
import matplotlib.pyplot as plt
import numpy as np
from CNOP import CNOP
import winsound
def threshold(x,thresholds=[],values=[-1,0,1]):
for threshold,val in zip(thresholds,values):
if x < threshold:
return val
return values[-1]
import time
from itertools import repeat
import os
from datetime import datetime
import numpy as np
import numpy.linalg as linalg
In [4]:
%%px --local
x2000 = pd.read_csv("\\\\DAP-NAS\\work\\CNOP\\x2000.csv", delimiter=";")
In [5]:
%%px --local
def recalcSE(path, fun, chunksize=1, **kwargs):
print "Starting SE on %i cores, \"%s\""\
%(len(clients.ids), fun.__name__)
print "NO BACKUP"
view = clients.load_balanced_view()
def doJob(i):
fun, distortion, res, kwargs=i
try:
local_res = fun(distortion=distortion, res=res,**kwargs)
#pickle.dump(local_res, open( filename, "wb" ) )
return local_res, ""
except Exception, e:
#pickle.dump(e, open( filename, "wb" ) )
return Exception, e, ""
dump = pickle.load(file(path))
print "Results loaded"
try:
job = [(fun, i[0][1], i[0][0], kwargs) for i in dump if type(i[0])!=type]
print "Job type 1, %i" % len(job)
except KeyError:
job = [(fun, i[1], i[0], kwargs) for i in dump if type(i)==tuple]
print "Job type 2, %i" % len(job)
ar = view.map_async(doJob, job, chunksize=chunksize)
yield ar
ar.wait_interactive()
results = ar.get()
print "DONE!"
yield results
In [6]:
%%px --local
def full_overlapSE(distortion, res, df, give_distortion=True):
########################################################
##### This code is for full-overlap case of CNOP########
########################################################
#PreSampling:
N=distortion.shape[1]
df = df[:N]
#df = x2000[:N]
### DGP generation
regime = pd.DataFrame()
beta, alpha = [0.6, 0.4, 0.8], [0.85, 1.55]
gammam, mum = [0.4, 0.3, 0.9], [-1.2, 0.07]
gammap, mup = [0.2, 0.8, 0.3], [1.28, 2.5]
####distortion = np.random.randn(3,N)
regime["xbeta"] = df.dot(beta) + distortion[0]
regime['z-gammam'] = df.dot(gammam) + distortion[1]
regime['z+gammap'] = df.dot(gammap) + distortion[2]
regime['regime'] = regime['xbeta'].apply(lambda x: threshold(x,thresholds=alpha))
regime['Y-']=regime['z-gammam'].apply(lambda x: threshold(x, thresholds=mum,values=[-2,-1,0]))
regime['Y+']=regime['z+gammap'].apply(lambda x: threshold(x, thresholds=mup,values=[0,1,2]))
df['Y'] = 0
df['Y'] += np.where(regime['regime']==-1,regime['Y-'],0)
df['Y'] += np.where(regime['regime']==1,regime['Y+'],0)
###df is full data matrix
#Model starts here:
exog = df[["X1", "X2", "X3"]]
endog = df[["Y"]]
l = {0:df}
pan = pd.Panel(l)
y = pan.ix[:,:,['Y']]
x = pan.ix[:,:,["X1", "X2", "X3"]]
zminus = pan.ix[:,:,["X1", "X2", "X3"]]
zplus = pan.ix[:,:,["X1", "X2", "X3"]]
exog = {'x':x,'zplus':zplus,'zminus':zminus}
CNOP4 = CNOP(y,exog, model='CNOP',interest_step=1)
# Standard Errors as well
try:
res["se"] = CNOP4.se(res.x)
res['status'] = "OK"
except Exception, e:
print e
res['status'] = e
if give_distortion:
return res, distortion
else:
return res
In [14]:
%%px --local
def partial_overlapSE(distortion, res, df, give_distortion=True):
########################################################
##### This code is for full-overlap case of CNOP########
########################################################
#PreSampling:
N=distortion.shape[1]
df = df[:N]
#df = x2000[:N]
### DGP generation
regime = pd.DataFrame()
beta, alpha = [0.6, 0.4], [0.9, 1.5]
gammam, mum = [0.3, 0.9], [-0.67, 0.36]
gammap, mup = [0.2, 0.3], [0.02, 1.28]
####distortion = np.random.randn(3,N)
regime["xbeta"] = df[["X1", "X2"]].dot(beta) + distortion[0]
regime['z-gammam'] = df[["X1", "X3"]].dot(gammam) + distortion[1]
regime['z+gammap'] = df[["X2", "X3"]].dot(gammap) + distortion[2]
regime['regime'] = regime['xbeta'].apply(lambda x: threshold(x,thresholds=alpha))
regime['Y-']=regime['z-gammam'].apply(lambda x: threshold(x, thresholds=mum,values=[-2,-1,0]))
regime['Y+']=regime['z+gammap'].apply(lambda x: threshold(x, thresholds=mup,values=[0,1,2]))
df['Y'] = 0
df['Y'] += np.where(regime['regime']==-1,regime['Y-'],0)
df['Y'] += np.where(regime['regime']==1,regime['Y+'],0)
###df is full data matrix
#Model starts here:
exog = df[["X1", "X2", "X3"]]
endog = df[["Y"]]
l = {0:df}
pan = pd.Panel(l)
y = pan.ix[:,:,['Y']]
x = pan.ix[:,:,["X1", "X2"]]
zminus = pan.ix[:,:,["X1", "X3"]]
zplus = pan.ix[:,:,["X2", "X3"]]
exog = {'x':x,'zplus':zplus,'zminus':zminus}
CNOP4 = CNOP(y,exog, model='CNOP',interest_step=1)
# Standard Errors as well
try:
res["se"] = CNOP4.se(res.x)
res['status'] = "OK"
except Exception, e:
print e
res['status'] = e
if give_distortion:
return res, distortion
else:
return res
In [59]:
%%px --local
def no_overlapSE(distortion, res, df, give_distortion=True):
########################################################
##### This code is for full-overlap case of CNOP########
########################################################
#PreSampling:
N=distortion.shape[1]
df = df[:N]
#df = x2000[:N]
### DGP generation
regime = pd.DataFrame()
beta, alpha = [0.6], [0.95, 1.45]
gammam, mum = [0.9], [-1.22, 0.03]
gammap, mup = [0.8], [-0.03, 1.18]
#####distortion = np.random.randn(3,N)
regime["xbeta"] = df[["X1"]].dot(beta) + distortion[0]
regime['z-gammam'] = df[["X2"]].dot(gammam) + distortion[1]
regime['z+gammap'] = df[["X3"]].dot(gammap) + distortion[2]
regime['regime'] = regime['xbeta'].apply(lambda x: threshold(x,thresholds=alpha))
regime['Y-']=regime['z-gammam'].apply(lambda x: threshold(x, thresholds=mum,values=[-2,-1,0]))
regime['Y+']=regime['z+gammap'].apply(lambda x: threshold(x, thresholds=mup,values=[0,1,2]))
df['Y'] = 0
df['Y'] += np.where(regime['regime']==-1,regime['Y-'],0)
df['Y'] += np.where(regime['regime']==1,regime['Y+'],0)
###df is full data matrix
#Model starts here:
exog = df[["X1", "X2", "X3"]]
endog = df[["Y"]]
l = {0:df}
pan = pd.Panel(l)
y = pan.ix[:,:,['Y']]
x = pan.ix[:,:,["X1"]]
zminus = pan.ix[:,:,["X2"]]
zplus = pan.ix[:,:,["X3"]]
exog = {'x':x,'zplus':zplus,'zminus':zminus}
CNOP4 = CNOP(y,exog, model='CNOP',interest_step=1)
# Standard Errors as well
try:
res["se"] = CNOP4.se(res.x)
res['status'] = "OK"
except Exception, e:
print e
res['status'] = e
if give_distortion:
return res, distortion
else:
return res
This code section provides processing function
In [21]:
def process_dump(obj, res_real, cutpoints = (.2, .8)):
if type(obj) is str:
mc_res = pickle.load(file(obj))
else:
mc_res = obj
xs = np.array([item[0][0].x for item in mc_res if len(item)==2 and item[0][0].success
and linalg.norm(item[0][0].x, ord=np.inf) < 100
])
ses = np.array([item[0][0].se for item in mc_res
if len(item)==2 and item[0][0].success
and linalg.norm(item[0][0].x, ord=np.inf) < 100
and "se" in item[0][0].keys() #and not np.isnan(item[0][0].se).any() \
#and linalg.norm(item[0][0].se, ord=np.inf) < 100
])
ses = np.nan_to_num(ses)
xs = pd.DataFrame(xs)
ses = pd.DataFrame(ses)
rmse = ((res_real - xs) ** 2).mean().mean()
bias = (res_real - xs).mean().mean()
#a_ratio = (ses.mean()/xs.std()).mean()
#m_ratio = (ses.median()/xs.std()).mean()
a_ratio = (ses[(ses<ses.quantile(cutpoints[1]))&(ses>ses.quantile(cutpoints[0]))].mean() \
/ xs[(xs<xs.quantile(cutpoints[1]))&(xs>xs.quantile(cutpoints[0]))].std()).mean()
m_ratio = (ses[(ses<ses.quantile(cutpoints[1]))&(ses>ses.quantile(cutpoints[0]))].median() \
/ xs[(xs<xs.quantile(cutpoints[1]))&(xs>xs.quantile(cutpoints[0]))].std()).mean()
if type(obj) is str:
print "FILE: %s"%(obj.split("\\")[-1])
print "BIAS: %2.3f"%(bias)
print "RMSE: %2.3f"%(rmse)
print "A-ratio: %2.3f"%(a_ratio)
print "M-ratio: %2.3f"%(m_ratio)
print
print "XS len: %s" % len(xs)
print "SE len: %s" % len(ses)
#print "SE mean: %s "% ses.mean()
#print "XS variance: %s "% xs.std()
return xs, ses
In [20]:
beta, alpha = [0.6, 0.4, 0.8], [0.85, 1.55]
gammam, mum = [0.4, 0.3, 0.9], [-1.2, 0.07]
gammap, mup = [0.2, 0.8, 0.3], [1.28, 2.5]
res_real_full = beta+alpha+gammam+mum+gammap+mup
beta, alpha = [0.6, 0.4], [0.9, 1.5]
gammam, mum = [0.3, 0.9], [-0.67, 0.36]
gammap, mup = [0.2, 0.3], [0.02, 1.28]
res_real_partial = beta+alpha+gammam+mum+gammap+mup
beta, alpha = [0.6], [0.95, 1.45]
gammam, mum = [0.9], [-1.22, 0.03]
gammap, mup = [0.8], [-0.03, 1.18]
res_real_no = beta+alpha+gammam+mum+gammap+mup
In [33]:
dump = pickle.load(file("W:\\CNOP\\dumps\\MC 31.03-results\\3Full\\res250_CHECKED"))
In [14]:
distortion=dump[1234][0][1]
#N=distortion.shape[1]
res = dump[1234][0][0]
In [34]:
ii = [(i[1], i[0]) for i in dump if type(i)==tuple]
In [35]:
len(ii)
Out[35]:
In [96]:
for dist, r in zip(distortion, res):
N=distortion.shape[1]
In [71]:
full_overlapSE(distortion=distortion, res=res, df=x2000)
Out[71]:
In [299]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\3Full\\res150_CHECKED", full_overlapSE, df=x2000, chunksize=50)
In [300]:
item = next(genr)
In [301]:
res = next(genr)
In [302]:
len(res)
Out[302]:
In [23]:
process_dump(res, res_real_full)
#OLD before NaN -> 0
In [305]:
process_dump(res, res_real_full, cutpoints=(0.1, 0.9));
In [319]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\3Full\\res250_CHECKED", full_overlapSE, df=x2000, chunksize=50)
item250 = next(genr)
In [320]:
res250 = next(genr)
In [321]:
process_dump(res250, res_real_full, cutpoints=(0.1,0.9));
In [ ]:
pickle.dump(res250, file("W:\\CNOP\\dumps\\SE 06.04\\3Full\\res250_CHECKED", "w"))
In [311]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\3Full\\res500_CHECKED", full_overlapSE, df=x2000, chunksize=50)
item500 = next(genr)
In [312]:
res500 = next(genr)
In [280]:
res500 = pickle.load(file("W:\\CNOP\\dumps\\SE 06.04\\3Full\\res500_CHECKED"))
In [313]:
process_dump(res500, res_real_full, cutpoints=(0.1,0.9));
In [322]:
del res500
In [ ]:
pickle.dump(res500, file("W:\\CNOP\\dumps\\SE 06.04\\3Full\\res500_CHECKED", "w"))
In [325]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\3Full\\res1000_CHECKED", full_overlapSE, df=x2000, chunksize=50)
item1000 = next(genr)
In [326]:
res1000 = next(genr)
In [288]:
pickle.dump(res1000, file("W:\\CNOP\\dumps\\SE 06.04\\3Full\\res1000_CHECKED", "w"))
In [327]:
process_dump(res1000, res_real_full, cutpoints=(0.1,0.9));
In [328]:
del res1000
In [329]:
del item1000
In [56]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\2Partial\\res150_CHECKED", partial_overlapSE, df=x2000, chunksize=50)
item125 = next(genr)
In [57]:
res125 = next(genr)
In [58]:
process_dump(res125, res_real_partial, cutpoints=(0.001,0.9));
In [25]:
pickle.dump(res125, file("W:\\CNOP\\dumps\\SE 06.04\\2Partial\\res150_CHECKED", "w"))
del genr, res125, item125
In [26]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\2Partial\\res250_CHECKED", partial_overlapSE, df=x2000, chunksize=50)
item250 = next(genr)
In [27]:
res250 = next(genr)
In [45]:
process_dump(res250, res_real_partial, cutpoints=(0.001,0.9));
In [46]:
pickle.dump(res250, file("W:\\CNOP\\dumps\\SE 06.04\\2Partial\\res250_CHECKED", "w"))
del genr, res250, item250
In [47]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\2Partial\\res500_CHECKED", partial_overlapSE, df=x2000, chunksize=50)
item500 = next(genr)
In [48]:
res500 = next(genr)
In [50]:
process_dump(res500, res_real_partial, cutpoints=(0.001,0.9));
In [51]:
pickle.dump(res500, file("W:\\CNOP\\dumps\\SE 06.04\\2Partial\\res500_CHECKED", "w"))
del genr, res500, item500
In [52]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\2Partial\\res1000_CHECKED", partial_overlapSE, df=x2000, chunksize=50)
item1000 = next(genr)
In [53]:
res1000 = next(genr)
In [54]:
process_dump(res1000, res_real_partial, cutpoints=(0.001,0.9));
In [55]:
pickle.dump(res1000, file("W:\\CNOP\\dumps\\SE 06.04\\2Partial\\res1000_CHECKED", "w"))
del genr, res1000, item1000
In [60]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\1No+\\res150_CHECKED", no_overlapSE, df=x2000, chunksize=50)
item125 = next(genr)
In [61]:
res125 = next(genr)
In [71]:
process_dump(res125, res_real_no, cutpoints=(0.01,0.99));
In [65]:
pickle.dump(res125, file("W:\\CNOP\\dumps\\SE 06.04\\1No\\res150_CHECKED", "w"))
#del genr, res125, item125
In [66]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\1No+\\res250_CHECKED", no_overlapSE, df=x2000, chunksize=50)
item250 = next(genr)
In [67]:
res250 = next(genr)
In [72]:
process_dump(res250, res_real_no, cutpoints=(0.01,0.99));
In [73]:
pickle.dump(res250, file("W:\\CNOP\\dumps\\SE 06.04\\1No\\res250_CHECKED", "w"))
#del genr, res125, item125
In [74]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\1No+\\res500_CHECKED", no_overlapSE, df=x2000, chunksize=50)
item500 = next(genr)
In [75]:
res500 = next(genr)
In [76]:
process_dump(res500, res_real_no, cutpoints=(0.01,0.99));
In [78]:
genr = recalcSE("W:\\CNOP\\dumps\\MC 31.03-results\\1No+\\res1000_OK", no_overlapSE, df=x2000, chunksize=50)
item1000 = next(genr)
In [79]:
res1000 = next(genr)
In [82]:
process_dump(res1000, res_real_no, cutpoints=(0.01,0.99));
In [ ]:
In [14]:
res500 = pickle.load(file("W:\\CNOP\\dumps\\SE 06.04\\3Full\\res500_CHECKED"))
In [16]:
process_dump(res500, res_real_full)
In [92]:
len(res500)
Out[92]:
In [99]:
ses = np.array([item[0][0].se for item in res500
if len(item)==2 and linalg.norm(item[0][0].x, ord=np.inf) < 100 \
and "se" in item[0][0].keys() and not np.isnan(item[0][0].se).any() \
#and linalg.norm(item[0][0].se, ord=np.inf) < 100 ])
])
In [100]:
ses.shape[0]
Out[100]:
In [52]:
import pandas as pd
%matplotlib inline
In [50]:
df = pd.DataFrame(ses)
In [88]:
(df.median()/ses.std(axis=0))
Out[88]:
In [89]:
ses.std(axis=0)
Out[89]:
In [90]:
df.mean()
Out[90]:
In [77]:
np.std?
In [53]:
#df.ix[:, 6:8].plot(kind = 'density', figsize=(9,9));
df.plot(kind = 'density', figsize=(9,9), logx=True);
In [69]:
df.ix[:,:4].boxplot();
df.ix[:,0:8].boxplot();
In [70]:
dfClean = df[abs(df)<200]
In [74]:
dfClean.dropna(inplace=True)
In [76]:
len(dfClean)
Out[76]:
In [91]:
len(df)
Out[91]:
In [90]:
#dfClean.plot(kind = 'density', figsize=(9,9), logx=True);
#dfClean[[0,9,14]].plot(kind = 'density', figsize=(9,9), logx=False);
df.hist(figsize=(9,9));
In [80]:
dfClean.mean()
Out[80]:
In [ ]:
In [38]:
np.isnan(np.inf)
Out[38]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: